#Join the data boards
I loaded september to october data boards from 2017 - 2023 to represent the freshers season then I merged the health boards #I summarise total antidepressant prescriptions per Freshers year and plotted the graph to see the trend #Looked at pre coviid , during covid and after covid trend to see if there has been any impact or association
library(tidyverse)
library(here) # directory stucture
library(gt) # tables
library(janitor) # cleaning data
library(ggplot2) # plotting graph
library(sf) # to read in map data
library(readxl) # to read in map data
library(plotly) # to make interactive
library(viridis)
library(sf)
loading a large amount of data in a shorter time period by downloading and using the mapdfr function (data from 2017-2023)
files <- list.files(here("data", "winter_data"), pattern = "csv")
winter_data <- files %>%
map_dfr(~read_csv(here("data", "winter_data", .))) %>%
clean_names()
clean up data and filter for the sections you want
filtered_winter_data <- winter_data %>%
filter(str_starts(bnf_item_code,"0403")) %>% #antidepressant code is 0403
mutate(year = as.numeric(substr(paid_date_month,1,4)), month = as.numeric(substr(paid_date_month,5,6))) %>% #separates the date into years and month so that i can group winter sections
mutate(winter_year=case_when(month == 12 ~ year + 1,
month %in% c(1,2) ~ year) )#makes a new column to group the winter years
filtered_winter_data <- filtered_winter_data %>%
unite("healthboards",hbt2014,hbt,sep = "_")#so some of my data healthboard codes were under the name hbt_2014 AND another was hbt so i had to merge the column so all the healthboard columns fall under one
filtered_winter_data$healthboards <- gsub("[NA]","",filtered_winter_data$healthboards)
filtered_winter_data$healthboards <-
gsub("_","",filtered_winter_data$healthboards)#had to remove some NA characters and '_' characters
Graph 1
winter_years_data <- filtered_winter_data %>%
group_by(winter_year) %>%
summarise(total_items=sum(number_of_paid_items,na.rm = TRUE))
plot <- ggplot(winter_years_data,aes(x=winter_year,y=total_items)) +
geom_line(linewidth=0.7,colour = "blue") +
geom_point(size=2)+
scale_x_continuous(breaks=2017:2023) +
labs(title="Antidepressant Prescriptions During Winter Season",x="Year",y="Total Antidepressant Prescriptions") +
theme_minimal()
ggplotly(plot)
print(plot)
#write a code talking about the zoomed in changes and reference why you dudnt go from 0
#ROUGH
population <- readxl::read_excel(here("data","population.xlsx"), skip=10) %>%
clean_names() %>%
select(x2,all_people) %>%
filter(!is.na(all_people)) %>%
rename(h_bname = "x2",hb_population = "all_people") %>%
filter(!str_detect(hb_population,"Cells"))
filtered2_winter_data <- filtered_winter_data %>%
group_by(healthboards,bnf_item_code,winter_year,gp_practice) %>%
summarise(total_paid = sum(paid_quantity, na.rm =TRUE))
retry reading in data
filtered3_winter_data <- filtered_winter_data %>%
group_by(healthboards,winter_year,paid_date_month,gp_practice) %>%
summarise(paid_quantity = sum(paid_quantity,na.rm=TRUE))
SIMD <- readxl::read_excel(here("data","SIMD.xlsx")) %>%
clean_names()
combined_pop_simd <- SIMD %>%
full_join(filtered3_winter_data,join_by(h_bcode == healthboards)) %>%
left_join(population,join_by(h_bname == h_bname))
retry making map
combined_pop_simd2 <- combined_pop_simd %>%
group_by(winter_year,h_bname,h_bcode) %>%
summarise(quantity_per_head = sum(paid_quantity)/mean(hb_population)) %>%
filter(!is.na(h_bname))
### map
NHS_healthboards <- st_read(here( "data", "Week6_NHS_HealthBoards_2019.shp")) %>%
clean_names() %>%
rename(h_bname = hb_name)
## Reading layer `Week6_NHS_healthboards_2019' from data source
## `/Users/olufimihanfaturoti/Year 3 Medicine/data-science/B251495/data/Week6_NHS_healthboards_2019.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 14 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 7564.996 ymin: 530635.8 xmax: 468754.8 ymax: 1218625
## Projected CRS: OSGB36 / British National Grid
# Join spatial data with falls_admissions_75_summary
mapped_data <- combined_pop_simd2 %>%
full_join(NHS_healthboards,by="h_bname") %>%
st_as_sf()
#CLAUDE :
library(ggiraph)
plot_map <- mapped_data %>%
ggplot() +
geom_sf_interactive( # Changed from geom_sf
aes(fill = quantity_per_head,
tooltip = paste0(h_bname,
"\nWinter Year: ", winter_year,
"\nQuantity per Head: ", round(quantity_per_head, 2))),
colour = "white",
size = 0.1
) +
scale_fill_distiller(palette = "Blues", direction = 1,
name = "Items per Head") +
labs(
title = "Antidepressant Prescriptions per Head",
subtitle = "By Health Board and Winter Year"
) +
facet_wrap(~ winter_year) +
theme_void() +
theme(
strip.text = element_text(size = 12, face = "bold"),
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 10)
)
interactive_map <- girafe(ggobj = plot_map) # Changed from ggplotly
interactive_map
retry making boxplot
files <- list.files(here("data", "winter_population"), pattern = "csv")
winter_population <- files %>%
map_dfr(~read_csv(here("data", "winter_population", .))) %>%
clean_names()
summary_combined <- combined_pop_simd %>%
group_by(simd2020v2_quintile,h_bname,gp_practice,winter_year) %>%
summarise(avg_paid_over_winters = mean(sum(paid_quantity))) %>%
filter(!is.na(simd2020v2_quintile))
files <- list.files(here("data", "winter_population"), pattern = "csv")
winter_population <- files %>%
map_dfr(~read_csv(here("data", "winter_population", .))) %>%
clean_names()
filtered_winter_population <- winter_population %>%
select(date,practice_code,hb,all_ages) %>%
mutate(year = as.numeric(substr(date,1,4))) %>%
rename(healthboards = hb) %>%
rename(winter_year = year)
filtered_winter_population$winter_year <- gsub("2020","2019",filtered_winter_population$winter_year)
filtered_winter_population$winter_year <- gsub("2023","2022",filtered_winter_population$winter_year)
summary_winter_with_simd <- summary_combined %>%
filter(winter_year %in% c("2019","2022")) %>%
rename(practice_code = gp_practice) %>%
rename(winter_year = winter_year)
summary_winter_with_simd <- summary_winter_with_simd %>%
mutate(winter_year = as.numeric(winter_year))
filtered_winter_population <- filtered_winter_population %>%
mutate(winter_year = as.numeric(winter_year))
joined_data <- summary_winter_with_simd %>%
full_join(filtered_winter_population,join_by (winter_year == winter_year, practice_code == practice_code))
final_boxplot <- joined_data %>%
group_by(simd2020v2_quintile,h_bname,practice_code,winter_year) %>%
reframe(avg_per_1000=(avg_paid_over_winters/all_ages)*1000,
year_label=factor(winter_year,
level = c(2019,2022),
labels = c("Pre-COVID (2019)","Post-COVID (2022)")))
#chat
hb_points <-final_boxplot %>%
group_by(simd2020v2_quintile,h_bname) %>% summarise(hb_avg = mean(avg_per_1000, na.rm = TRUE))
#chat
final_boxplot <- final_boxplot %>%
mutate(simd2020v2_quintile = factor(simd2020v2_quintile))
hb_points <- hb_points %>%
mutate(simd2020v2_quintile = factor(simd2020v2_quintile))
box2 <- final_boxplot %>%
ggplot(aes(x = factor(simd2020v2_quintile), y = avg_per_1000,
fill = factor(simd2020v2_quintile))) +
geom_boxplot(outlier.shape = NA) + # hide boxplot outliers
geom_point(data = hb_points,
aes(x = factor(simd2020v2_quintile), y = hb_avg,
text = paste0("Health Board: ", h_bname, "<br>",
"SIMD: ", simd2020v2_quintile, "<br>",
"Avg per 1000: ", round(hb_avg, 1))),
color = "red",
size = 2,
alpha = 0.7,
position = position_jitter(width = 0.2)) +
scale_fill_viridis(discrete = TRUE, alpha = 0.6) +
scale_y_continuous(labels = scales::label_comma()) +
coord_cartesian(ylim = c(5000, 20000000)) +
theme_minimal() +
theme(legend.position = "none") +
facet_wrap(~winter_year) +
labs(title = "Average Winter Antidepressant Prescriptions Per Practice by SIMD Quintile",
x = "SIMD Quintile (1 = Most Deprived)",
y = "Avg prescriptions per 1000")
# Interactive plot
ggplotly(box2, tooltip = "text")
#just boxplot
box2 <- final_boxplot %>%
ggplot(aes(x=simd2020v2_quintile,y=avg_per_1000, fill= factor(simd2020v2_quintile)))+
geom_boxplot() + geom_point(data = hb_points,
aes(x = factor(simd2020v2_quintile),
y = hb_avg),
color = "red",
size = 2,
alpha = 0.7,
position = position_jitter(width = 0.2)) +
scale_fill_viridis(discrete = TRUE, alpha=0.6)+
scale_y_continuous(labels = scales::label_comma()) +
coord_cartesian(ylim = c(5000,20000000))+
geom_jitter_interactive(color = "lightblue",size=0.4,alpha=0.9)+
theme_minimal()+
theme(legend.position = "none")+
facet_wrap(~winter_year) +
labs(title = "Average Winter Antidepressant Prescriptions Per by SIMD Quintile",
x = "SIMD Quintile (1 = Most Deprived)",
y = "Avg prescriptions per 1000")
ggplotly(box2)
retry lollipop
lollipop_data <- combined_pop_simd %>%
filter(winter_year %in% c("2019","2022")) %>%
group_by(simd2020v2_quintile,h_bname,winter_year) %>%
summarise(total_paid = sum(paid_quantity,na.rm=TRUE),avg_SIMD = mean(simd2020v2_quintile, na.rm=TRUE)) %>%
mutate(period=ifelse(winter_year==2019, "pre", "post"))
#cht
lollipop1_summary <- lollipop_data %>%
# Step 1: Aggregate prescribing for each HB × period
group_by(h_bname, period) %>%
summarise(
total_prescribing = sum(total_paid, na.rm = TRUE),
avg_simd = mean(simd2020v2_quintile, na.rm = TRUE),
.groups = "drop"
) %>%
# Step 2: Remove HBs with missing names
filter(!is.na(h_bname)) %>%
# Step 3: Pivot pre/post
tidyr::pivot_wider(
names_from = period,
values_from = total_prescribing
) %>%
# Step 4: Calculate percentage change
mutate(
pct_change = ((post - pre) / pre) * 100
) %>%
# Step 5: Rank health boards by average SIMD
arrange(avg_simd) %>%
mutate(h_bname = factor(h_bname, levels = unique(h_bname)))
pct_lollipop <-lollipop1_summary %>%
ggplot () +
geom_segment(aes(x=h_bname, xend=h_bname, y=0, yend = pct_change), color = "skyblue")+
geom_point(aes(x = h_bname, y = pct_change),color = "blue", size=3, alpha = 0.6) +
theme_light() +
coord_flip() +
theme(panel.grid.major.y = element_blank(),
panel.border = element_blank(),
axis.ticks.y = element_blank()) +
labs( title = "Percentage Change in Prescribing per Health Board",
x = "Health Board",
y = "Percentage Change (%)")
ggplotly(pct_lollipop)
questions : not sure the best way to displa my original data ? github - how to get rid of the signs 1- overall national trend (use original graph) 2- i want to show the variation between different regions using healthboards 3- link it to deprevation and look at prescriptions per person 4- can i do a map that shows pre covid and post covid side by side would that count as one 5- voilin plot across differet SIMDs to compare smaller unit of data - gp practice (postcode that links to SIMD) PATCHWORK - MAPS TOGETHETE reference line to show the split between precovid, covid and postcovid
every dot is a gp practice - gp practice - dataset - adressess (assessment prep) voilin plot if messy add transperency open data use quintiles for voilin plot do a code that says if not installed install and load packages
percentage increase
overall trend map antidepressant prescribing per head by healthboard facet by winter year? boxplot by SIMD dumbell plot / lollipop graph - percentage change write where the links are from